suppressPackageStartupMessages(library(tidyverse))
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities

Settings

data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'

wd <- "/Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/"
setwd(wd)

figdir <- paste0(wd, 'Figures/Shortread/DETs/')
tabledir <- paste0(wd, 'Tables/Shortread/')

theme_set(
  theme_classic(base_size = 7) +
    theme(legend.position = 'bottom')
)

Functions

add_genetype2 <- function(df) {
  
  df |> 
    mutate(
      genetype2 = case_when(
        gene_type == 'protein_coding' & seqname == 'chrM' ~ 'mt-mRNA',
        gene_type == 'protein_coding' & seqname != 'chrM' ~ 'mRNA',
        grepl('MT-RNR', gene_name) & seqname == 'chrM' ~ 'mt-rRNA',
        grepl('MT-T', gene_name) & seqname == 'chrM' ~ 'mt-tRNA',
        is.na(gene_type) ~ 'unannotated gene', 
        .default = 'other ncRNAs' 
      )
    )
  
}

add_isDET <- function(df) {
  
  df |> 
    rowwise() |> 
    mutate(
      isUp = case_when(
        max(siMETTL2A_G_pvalue, siMETTL2A_I_pvalue) < .05 & 
          min(siMETTL2A_G_log2FoldChange, siMETTL2A_I_log2FoldChange) > 0
        ~ 'common',
        siMETTL2A_G_pvalue < .05 & siMETTL2A_G_log2FoldChange > 0
        ~ 'only G',
        siMETTL2A_I_pvalue < .05 & siMETTL2A_I_log2FoldChange > 0
        ~ 'only I',
        .default = 'not'),
      isDown = case_when(
        max(siMETTL2A_G_pvalue, siMETTL2A_I_pvalue) < .05 & 
          max(siMETTL2A_G_log2FoldChange, siMETTL2A_I_log2FoldChange) < 0
        ~ 'common',
        siMETTL2A_G_pvalue < .05 & siMETTL2A_G_log2FoldChange < 0
        ~ 'only G',
        siMETTL2A_I_pvalue < .05 & siMETTL2A_I_log2FoldChange < 0
        ~ 'only I',
        .default = 'not')
    ) |> 
    mutate(
      common_DETs = case_when(
        isUp   == 'common' ~ 'up',
        isDown == 'common' ~ 'down',
        .default = 'other'
      )
    ) |> 
    ungroup()
  
}

Read data

Read shortread DESeq2 results

shortread_DESeq2_results <- 
  read_tsv(
    paste0(wd, 'Tables/shortread_DESeq2_results_annotated_2024-04-05.tsv')
  ) |> 
  select(starts_with('gene_'), everything())
## Rows: 13418 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (4): gene_id, gene_type, gene_name, seqname
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shortread_DESeq2_results
## # A tibble: 13,418 × 22
##    gene_id         gene_type gene_name siMETTL2A_baseMean siMETTL2A_log2FoldCh…¹
##    <chr>           <chr>     <chr>                  <dbl>                  <dbl>
##  1 ENSG0000000045… protein_… SCYL3                  1499.                 0.242 
##  2 ENSG0000000046… protein_… C1orf112               1060.                -1.08  
##  3 ENSG0000000146… protein_… STPG1                  1857.                -0.286 
##  4 ENSG0000000146… protein_… NIPAL3                11719.                 0.534 
##  5 ENSG0000000445… protein_… AK2                   15695.                -0.548 
##  6 ENSG0000000448… protein_… KDM1A                 17975.                 0.353 
##  7 ENSG0000000655… protein_… TTC22                  1441.                 0.845 
##  8 ENSG0000000734… protein_… ST7L                    857.                -0.0295
##  9 ENSG0000000792… protein_… DNAJC11                9679.                 0.411 
## 10 ENSG0000000812… <NA>      <NA>                    413.                 0.231 
## # ℹ 13,408 more rows
## # ℹ abbreviated name: ¹​siMETTL2A_log2FoldChange
## # ℹ 17 more variables: siMETTL2A_lfcSE <dbl>, siMETTL2A_stat <dbl>,
## #   siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>, siMETTL2A_I_baseMean <dbl>,
## #   siMETTL2A_I_log2FoldChange <dbl>, siMETTL2A_I_lfcSE <dbl>,
## #   siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>, siMETTL2A_I_padj <dbl>,
## #   siMETTL2A_G_baseMean <dbl>, siMETTL2A_G_log2FoldChange <dbl>, …

Read methylated gene information

DRS_methylated_genes <- 
  read_tsv(
    paste0(wd, 'Tables/DRS_m3C_sites/methylated_positions_2024-03-29.tsv')
  ) |> 
  select(gene_id) |> 
  distinct() |> 
  mutate(methylation = '+')
## Rows: 632 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (13): transcript_id, kmer, seqname, source, feature, score, strand, fram...
## dbl  (3): position, start, end
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DRS_methylated_genes
## # A tibble: 80 × 2
##    gene_id            methylation
##    <chr>              <chr>      
##  1 ENSG00000008988.11 +          
##  2 ENSG00000086548.9  +          
##  3 ENSG00000089009.16 +          
##  4 ENSG00000240972.2  +          
##  5 ENSG00000026025.16 +          
##  6 ENSG00000111640.15 +          
##  7 ENSG00000111775.3  +          
##  8 ENSG00000112306.8  +          
##  9 ENSG00000034510.6  +          
## 10 ENSG00000116251.11 +          
## # ℹ 70 more rows

Join methylation info with DESeq2 result

shortread_DESeq2_results_DRSm3C <- 
  shortread_DESeq2_results |> 
  left_join(DRS_methylated_genes) |> 
  replace_na(list(methylation = '-'))
## Joining with `by = join_by(gene_id)`
shortread_DESeq2_results_DRSm3C
## # A tibble: 13,418 × 23
##    gene_id         gene_type gene_name siMETTL2A_baseMean siMETTL2A_log2FoldCh…¹
##    <chr>           <chr>     <chr>                  <dbl>                  <dbl>
##  1 ENSG0000000045… protein_… SCYL3                  1499.                 0.242 
##  2 ENSG0000000046… protein_… C1orf112               1060.                -1.08  
##  3 ENSG0000000146… protein_… STPG1                  1857.                -0.286 
##  4 ENSG0000000146… protein_… NIPAL3                11719.                 0.534 
##  5 ENSG0000000445… protein_… AK2                   15695.                -0.548 
##  6 ENSG0000000448… protein_… KDM1A                 17975.                 0.353 
##  7 ENSG0000000655… protein_… TTC22                  1441.                 0.845 
##  8 ENSG0000000734… protein_… ST7L                    857.                -0.0295
##  9 ENSG0000000792… protein_… DNAJC11                9679.                 0.411 
## 10 ENSG0000000812… <NA>      <NA>                    413.                 0.231 
## # ℹ 13,408 more rows
## # ℹ abbreviated name: ¹​siMETTL2A_log2FoldChange
## # ℹ 18 more variables: siMETTL2A_lfcSE <dbl>, siMETTL2A_stat <dbl>,
## #   siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>, siMETTL2A_I_baseMean <dbl>,
## #   siMETTL2A_I_log2FoldChange <dbl>, siMETTL2A_I_lfcSE <dbl>,
## #   siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>, siMETTL2A_I_padj <dbl>,
## #   siMETTL2A_G_baseMean <dbl>, siMETTL2A_G_log2FoldChange <dbl>, …

Add DET information and custom genetype

shortread_DESeq2_results_DRSm3C_DETinfo <- 
  shortread_DESeq2_results_DRSm3C |> 
  add_isDET() |> 
  add_genetype2()
shortread_DESeq2_results_DRSm3C_DETinfo |> 
  export_tsv(outdir = tabledir)
## 
## Exported to: /Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/Tables/Shortread/shortread_DESeq2_results_DRSm3C_DETinfo_2024-04-06.tsv
## # A tibble: 13,418 × 27
##    gene_id         gene_type gene_name siMETTL2A_baseMean siMETTL2A_log2FoldCh…¹
##    <chr>           <chr>     <chr>                  <dbl>                  <dbl>
##  1 ENSG0000000045… protein_… SCYL3                  1499.                 0.242 
##  2 ENSG0000000046… protein_… C1orf112               1060.                -1.08  
##  3 ENSG0000000146… protein_… STPG1                  1857.                -0.286 
##  4 ENSG0000000146… protein_… NIPAL3                11719.                 0.534 
##  5 ENSG0000000445… protein_… AK2                   15695.                -0.548 
##  6 ENSG0000000448… protein_… KDM1A                 17975.                 0.353 
##  7 ENSG0000000655… protein_… TTC22                  1441.                 0.845 
##  8 ENSG0000000734… protein_… ST7L                    857.                -0.0295
##  9 ENSG0000000792… protein_… DNAJC11                9679.                 0.411 
## 10 ENSG0000000812… <NA>      <NA>                    413.                 0.231 
## # ℹ 13,408 more rows
## # ℹ abbreviated name: ¹​siMETTL2A_log2FoldChange
## # ℹ 22 more variables: siMETTL2A_lfcSE <dbl>, siMETTL2A_stat <dbl>,
## #   siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>, siMETTL2A_I_baseMean <dbl>,
## #   siMETTL2A_I_log2FoldChange <dbl>, siMETTL2A_I_lfcSE <dbl>,
## #   siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>, siMETTL2A_I_padj <dbl>,
## #   siMETTL2A_G_baseMean <dbl>, siMETTL2A_G_log2FoldChange <dbl>, …
shortread_DESeq2_results_DETinfo_groupedby_genetypes <- 
  shortread_DESeq2_results_DRSm3C_DETinfo |> 
  group_by(common_DETs, genetype2) |> 
  reframe(n = n()) |> 
  group_by(genetype2) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  arrange(genetype2)
shortread_DESeq2_results_DETinfo_groupedby_genetypes
## # A tibble: 14 × 4
## # Groups:   genetype2 [6]
##    common_DETs genetype2            n percentage
##    <chr>       <chr>            <int>      <dbl>
##  1 down        mRNA              2248     21.6  
##  2 other       mRNA              6150     59.1  
##  3 up          mRNA              2007     19.3  
##  4 other       mt-mRNA              4     30.8  
##  5 up          mt-mRNA              9     69.2  
##  6 up          mt-rRNA              2    100    
##  7 other       mt-tRNA              6     85.7  
##  8 up          mt-tRNA              1     14.3  
##  9 down        other ncRNAs       168      5.91 
## 10 other       other ncRNAs      2411     84.8  
## 11 up          other ncRNAs       264      9.29 
## 12 down        unannotated gene     1      0.676
## 13 other       unannotated gene   140     94.6  
## 14 up          unannotated gene     7      4.73
shortread_DESeq2_results_DETinfo_groupedby_methylation <- 
  shortread_DESeq2_results_DRSm3C_DETinfo |> 
  group_by(common_DETs, methylation) |> 
  reframe(n = n()) |> 
  group_by(methylation) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  arrange(methylation)
shortread_DESeq2_results_DETinfo_groupedby_methylation
## # A tibble: 6 × 4
## # Groups:   methylation [2]
##   common_DETs methylation     n percentage
##   <chr>       <chr>       <int>      <dbl>
## 1 down        +               6       7.59
## 2 other       +              50      63.3 
## 3 up          +              23      29.1 
## 4 down        -            2411      18.1 
## 5 other       -            8661      64.9 
## 6 up          -            2267      17.0
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation <- 
  shortread_DESeq2_results_DRSm3C_DETinfo |> 
  group_by(common_DETs, methylation, genetype2) |> 
  reframe(n = n()) |> 
  group_by(methylation, genetype2) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  arrange(methylation, genetype2)
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation
## # A tibble: 19 × 5
## # Groups:   methylation, genetype2 [8]
##    common_DETs methylation genetype2            n percentage
##    <chr>       <chr>       <chr>            <int>      <dbl>
##  1 down        +           mRNA                 6      8.82 
##  2 other       +           mRNA                48     70.6  
##  3 up          +           mRNA                14     20.6  
##  4 other       +           mt-mRNA              2     22.2  
##  5 up          +           mt-mRNA              7     77.8  
##  6 up          +           mt-rRNA              2    100    
##  7 down        -           mRNA              2242     21.7  
##  8 other       -           mRNA              6102     59.0  
##  9 up          -           mRNA              1993     19.3  
## 10 other       -           mt-mRNA              2     50    
## 11 up          -           mt-mRNA              2     50    
## 12 other       -           mt-tRNA              6     85.7  
## 13 up          -           mt-tRNA              1     14.3  
## 14 down        -           other ncRNAs       168      5.91 
## 15 other       -           other ncRNAs      2411     84.8  
## 16 up          -           other ncRNAs       264      9.29 
## 17 down        -           unannotated gene     1      0.676
## 18 other       -           unannotated gene   140     94.6  
## 19 up          -           unannotated gene     7      4.73

Plot

shortread_DESeq2_results_DETinfo_groupedby_genetypes_barplot <- 
  shortread_DESeq2_results_DETinfo_groupedby_genetypes |> 
  ggplot(aes(x = fct_rev(genetype2), y = n, fill = common_DETs)) +
  geom_bar(stat = 'identity', position = position_fill()) +
  scale_y_reverse() +
  scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
  coord_flip()
shortread_DESeq2_results_DETinfo_groupedby_genetypes_barplot |> 
  ggsave_multiple_formats(
    width = 5, height = 5, fontsize = 7, outdir = figdir
  )

shortread_DESeq2_results_DETinfo_groupedby_methylation_barplot <- 
  shortread_DESeq2_results_DETinfo_groupedby_methylation |> 
  ggplot(aes(x = methylation, y = n, fill = common_DETs)) +
  geom_bar(stat = 'identity', position = position_fill()) +
  scale_y_reverse() +
  scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
  coord_flip()
shortread_DESeq2_results_DETinfo_groupedby_methylation_barplot |> 
  ggsave_multiple_formats(
    width = 3.5, height = 2.5, fontsize = 7, outdir = figdir
  )

shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation_barplot <- 
  shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation |> 
  ggplot(aes(x = interaction(methylation, genetype2), 
             y = n, fill = common_DETs)) +
  geom_bar(stat = 'identity', position = position_fill()) +
  scale_y_reverse() +
  scale_x_discrete(guide = ggh4x::guide_axis_nested(delim = '.')) +
  scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
  coord_flip()
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation_barplot |> 
  ggsave_multiple_formats(
    width = 6, height = 6, fontsize = 7, outdir = figdir
  )
## Warning: The S3 guide system was deprecated in ggplot2 3.5.0.
## ℹ It has been replaced by a ggproto system that can be extended.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.